#!/usr/bin/env python3
# Copyright (c) 2020-2021 maminjie <maminjie1@huawei.com>
# SPDX-License-Identifier: MulanPSL-2.0

"""
This is code statistic class
"""

import re
import time
import sys
import os

from oec.apps import openeuler

class CodeStat(object):
    """
    Statistic the amount of code submitted to the openEuler
    """
    def __init__(self):
        self.req = openeuler.OpenEuler()

    def get_contributors(self, owner, repo):
        """
        Get the contributors of owner/repo that can also be called code committers

        Parameters:
          owner - [str] the project path
          repo - [str] the code repository

        Returns:
          contributors dict
            key - [str] the contributor name
            value - [int] the commit numbers
        """
        contributors = {}
        raw_dict = self.req.get_contributors(owner, repo)
        for info in raw_dict:
            name = info.get("name")
            contributions = info.get("contributions")
            if name and contributions:
                contributors[name] = contributions
        return contributors

    def get_branches(self, owner, repo):
        """
        Get the branches of owner/repo

        Parameters:
          owner - [str] the project path
          repo - [str] the code repository

        Returns:
          branch list
            member - [str] the branch name
        """
        branches = []
        raw_dict = self.req.get_branches(owner, repo)
        for info in raw_dict:
            branch = info.get("name")
            if branch:
                branches.append(branch)
        return branches

    def _get_commit_time(self, ci_dict):
        ci_time = None
        author_dict = ci_dict.get("author")
        if author_dict:
            ci_time = author_dict.get("date")
        return ci_time

    def _get_one_commit(self, info_dict):
        commit = {}

        ci_dict = info_dict.get("commit")
        if not ci_dict:
            return commit
        ci_time = self._get_commit_time(ci_dict)
        if not ci_time:
            return commit
        stats_dict = info_dict.get("stats")
        if not stats_dict:
            return commit
        ci_id = stats_dict.get("id")
        if not ci_id:
            return commit
        additions = stats_dict.get("additions")
        deletions = stats_dict.get("deletions")
        total = stats_dict.get("total")
        if isinstance(additions, int) and isinstance(deletions, int) and \
            isinstance(total, int):
            commit["id"] = ci_id
            commit["addition"] = additions
            commit["deletion"] = deletions
            commit["total"] = total
            commit["time"] = ci_time
        return commit

    def get_commits(self, owner, repo, sha="", author="", since="", until="", page=1, per_page=20):
        """
        Get all commits of owner/repo. You can specify sha, author, time

        Parameters:
          owner - [str] the project path
          repo - [str] the code repository
          sha - [str] SHA value or branch name, "" represents the default branch
          author - [str] the username or login name
          since - [str] the start time
          until - [str] the end time
          page - [int] the current page
          per_page - [int] numbers of per page, max value is 100

        Returns:
          commits list, the member is dict
            key - [str] id,   addition, deletion, total, time
            value -    [str]    [int]    [int]    [int]  [str]
        """
        commits = []
        raw_dict = self.req.get_commits(owner, repo, sha, author, since, until, page, per_page)
        for info in raw_dict:
            commit = self._get_one_commit(info)
            if commit:
                commits.append(commit)
        return commits

    def get_one_commit(self, owner, repo, sha):
        """
        Get one commit of owner/repo. You can specify sha

        Parameters:
          owner - [str] the project path
          repo - [str] the code repository
          sha - [str] SHA value or branch name, branch=last commit SHA value

        Returns:
          commit dict
            key - [str] id,   addition, deletion, total, time
            value -    [str]    [int]    [int]    [int]  [str]
        """
        commit = {}
        raw_dict = self.req.get_one_commit(owner, repo, sha)
        if raw_dict:
            commit = self._get_one_commit(raw_dict)
        return commit

    def _get_pr_type_by_patchdata(self, patchdata):
        pattern = "create mode .*\.spec"
        result = re.findall(pattern, patchdata)
        if result:
            return "init"
        pattern = "(?:create|delete) mode.*(?:\.gz|\.tgz|\.jar|\.bz2|\.xz|\.tar|\.zip|\.gem)"
        result = re.findall(pattern, patchdata)
        if result:
            version_old = re.findall("\-Version:\s+(.*)", patchdata)
            version_new = re.findall("\+Version:\s+(.*)", patchdata)
            if version_old and version_new:
                if version_new[0] > version_old[0]:
                    return "upgrade"
        pattern = "create mode .*\.patch"
        result = re.findall(pattern, patchdata)
        if result:
            return "patch"
        return "other"

    def _get_pr_type(self, title, patchaddr):
        pr_type = "other"
        if re.search("init", title, re.I):
            return "init"
        if re.search("upgrade", title, re.I):
            return "upgrade"
        if patchaddr:
            try:
                patchdata = self.req.get_request(patchaddr)
            except UnicodeDecodeError:
                pass
            else:
                if patchdata:
                    pr_type = self._get_pr_type_by_patchdata(patchdata)
        if pr_type == "other":
            if re.search("fix", title, re.I):
                return "bugfix"
        return pr_type

    def _get_pr_less(self, info_dict):
        pr = {}
        number = info_dict.get("number")
        if not number:
            return pr
        state_str = info_dict.get("state")
        if not state_str:
            return pr
        title = info_dict.get("title")
        if not title:
            return pr
        # first commit id
        first_cid = info_dict.get("statuses_url")
        if not first_cid:
            return pr
        first_cid = str(first_cid).split("/")[-1]
        head_dict = info_dict.get("head")
        if not head_dict:
            return pr
        # last commit id
        last_cid = head_dict.get("sha")
        if not last_cid:
            return pr
        user_dict = head_dict.get("user")
        if not user_dict:
            return pr
        user = user_dict.get("login")
        if not user:
            return pr
        pr["number"] = number
        pr["state"] = state_str
        pr["title"] = title
        pr["user"] = user
        pr["firstCommitId"] = first_cid
        pr["lastCommitId"] = last_cid
        return pr

    def _get_pr(self, info_dict):
        pr = self._get_pr_less(info_dict)
        if pr:
            pr["type"] = self._get_pr_type(pr["title"], info_dict.get("patch_url"))
        return pr

    def get_pr_list(self, owner, repo, head="", base="", state="all", page=1, per_page=20):
        """
        Get pull-request list of owner/repo. You can specify head, base, state

        Parameters:
          owner - [str] the project path
          repo - [str] the code repository
          head - [str] source branch, format: branch or username:branch
          base - [str] target branch
          state - [str] the state of pr, contains: open, closed, merged, all
          page - [int] the current page
          per_page - [int] numbers of per page, max value is 100

        Returns:
          pr list, the member is dict
            key - [str] number, state, type, title, user, firstCommitId, lastCommitId
            value -     [int]   [str]  [str] [str]  [str]     [str]         [str]
        """
        pr_list = []
        raw_dict = self.req.get_pr_list(owner, repo, head, base, state, page, per_page)
        for info in raw_dict:
            pr = self._get_pr(info)
            if pr:
                pr_list.append(pr)
        return pr_list

    def get_pr_list_less(self, owner, repo, head="", base="", state="all", page=1, per_page=20):
        """ missing prType """
        pr_list = []
        raw_dict = self.req.get_pr_list(owner, repo, head, base, state, page, per_page)
        for info in raw_dict:
            pr = self._get_pr_less(info)
            if pr:
                pr_list.append(pr)
        return pr_list

    def _get_build_status(self, data_str):
        status = ""
        if re.search("FAILED|FAILURE", data_str):
            return "fail"
        if re.search("WARNING", data_str):
            return "warning"
        if re.search("SUCCESS", data_str):
            return "success"
        return status

    def _review_is_valid(self, data_str):
        if re.search("/retest", data_str):
            return False
        return True

    def get_pr_comments(self, owner, repo, number):
        """
        Get pr comments of owner/repo.

        Parameters:
          owner - [str] the project path
          repo - [str] the code repository
          number - [int] the number of pr

        Returns:
          comments dict, the member is dict
            key - [str] number, failCnt, reviewCnt, reviewer
            value -     [int]    [int]     [int]     [list]
        """
        page = 1
        ci_bot = "openeuler-ci-bot"
        sync_bot = "openeuler-sync-bot"
        fail_cnt = 0
        review_cnt = 0
        reviewer_list = []
        pr_comment = {}

        raw_dict = self.req.get_pr_comments(owner, repo, number, page)
        while len(raw_dict):
            for info in raw_dict:
                user_dict = info.get("user")
                if not user_dict:
                    continue
                user = user_dict.get("login")
                if not user:
                    continue
                body = info.get("body")
                if not body:
                    continue
                if user == ci_bot:
                    status = self._get_build_status(str(body))
                    if status == "fail":
                        fail_cnt += 1
                elif user == sync_bot:
                    continue
                else:
                    if self._review_is_valid(str(body)):
                        review_cnt += 1
                        reviewer_info = {}
                        reviewer_info["name"] = user
                        reviewer_info["body"] = body
                        reviewer_list.append(reviewer_info)
            page += 1
            raw_dict = self.req.get_pr_comments(owner, repo, number, page)

        pr_comment["number"] = number
        pr_comment["failCnt"] = fail_cnt
        pr_comment["reviewCnt"] = review_cnt
        pr_comment["reviewer"] = reviewer_list
        return pr_comment

    def get_reviewers(self, owner, repo):
        """
        Get reviewers of owner/repo.

        Parameters:
          owner - [str] the project path
          repo - [str] the code repository

        Returns:
          reviewer list
            member - [str] username
        """
        reviewers = []
        raw_dict = self.req.get_reviewers(owner, repo)
        for info in raw_dict:
            reviewer = info.get("login")
            if reviewer:
                reviewers.append(reviewer)
        return reviewers

    def get_org_repos(self, org, repo_type="all", page=1, per_page=20):
        """
        Get repos of org

        Parameters:
          org - [str] the organization name
          repo_type - [str] the repository type, as follows: all, public, private
          page - [int] the current page
          per_page - [int] numbers of per page, max value is 100

        Returns:
          repo list
            member - [str] the repository name
        """
        repos = []
        raw_dict = self.req.get_org_repos(org, repo_type, page, per_page)
        for info in raw_dict:
            full_name = info.get("full_name")
            if not full_name:
                continue
            name = full_name.split("/")[-1] # strip the org path
            repos.append(name)
        return repos

    def _check_pr_commit_id(self, ci_dict, pr_dict):
        first_cid = pr_dict["firstCommitId"]
        last_cid = pr_dict["lastCommitId"]
        if first_cid == "" or last_cid == "":
            return False
        if ci_dict["id"] == first_cid or ci_dict["id"] == last_cid:
            return True
        return False

    def _check_commit_belong_pr(self, owner, repo, ci_dict, pr_dict):
        first_cid = pr_dict["firstCommitId"]
        last_cid = pr_dict["lastCommitId"]
        if ci_dict["id"] == first_cid or ci_dict["id"] == last_cid:
            return True
        else:
            if first_cid == "" or last_cid == "":
                return False
            first_ci_dict = self.get_one_commit(owner, repo, first_cid)
            last_ci_dict = self.get_one_commit(owner, repo, last_cid)
            if not first_ci_dict or not last_ci_dict:
                return False
            if ci_dict["time"] > first_ci_dict["time"] and ci_dict["time"] < last_ci_dict["time"]:
                return True
        return False

    def _convert_time_with_zone(self, time_str):
        tmp_str = time_str
        match_obj = re.search(r"(\+\d\d):(\d\d)", time_str)
        if match_obj:
            tmp_str = time_str[0:-6] + match_obj.group(1) + match_obj.group(2)
        time_array = time.strptime(tmp_str, "%Y-%m-%dT%H:%M:%S%z")
        tmp_str = time.strftime("%Y-%m-%d %H:%M:%S", time_array)
        return tmp_str

    def _commit_statistic(self, user, owner, repo, branch, ci_dict):
        page = 0
        per_page = 50
        reject_cnt = 0
        prc_dict = {}
        pr_type = "other"
        pr_title = ""
        pr_author = ""
        stat = {}

        # TODO：Traverse pr per commit, need to be optimized
        pr_list = self.get_pr_list(owner, repo, "", branch, page=page, per_page=per_page)
        while len(pr_list):
            for pr_dict in pr_list:
                if pr_dict.get("user") == user:
                    if self._check_commit_belong_pr(owner, repo, ci_dict, pr_dict) == False:
                        continue
                else: # user is not equal
                    if self._check_pr_commit_id(ci_dict, pr_dict) == False:
                        continue
                state = pr_dict["state"]
                if state == "closed":
                    reject_cnt += 1
                elif state == "merged":
                    pr_type = pr_dict["type"]
                    pr_title = pr_dict["title"]
                    pr_author = pr_dict["user"]
                    prc_dict = self.get_pr_comments(owner, repo, pr_dict["number"])
            page += 1
            pr_list = self.get_pr_list(owner, repo, "", branch, page=page, per_page=per_page)

        if not prc_dict:
            return stat
        stat["user"] = user
        stat["repo"] = owner + "/" + repo
        stat["branch"] = branch
        stat["state"] = "merged"
        stat["prNum"] = prc_dict.get("number")
        stat["prType"] = pr_type
        stat["prTitle"] = pr_title
        stat["prAuthor"] = pr_author
        stat["failCnt"] = prc_dict.get("failCnt")
        stat["rejectCnt"] = reject_cnt
        stat["addition"] = ci_dict["addition"]
        stat["deletion"] = ci_dict["deletion"]
        stat["total"] = ci_dict["total"]
        stat["reviewCnt"] = prc_dict.get("reviewCnt")
        stat["reviewer"] = prc_dict.get("reviewer")
        stat["time"] = self._convert_time_with_zone(ci_dict["time"])
        return stat

    def _contribute_commit_statistic(self, user, owner, repo, branch, ci_dict):
        page = 0
        per_page = 50
        reject_cnt = 0
        prc_dict = {}
        stat = {}

        pr_list = self.get_pr_list_less(owner, repo, "", branch, page=page, per_page=per_page)
        while len(pr_list):
            for pr_dict in pr_list:
                if pr_dict.get("user") == user:
                    if self._check_commit_belong_pr(owner, repo, ci_dict, pr_dict) == False:
                        continue
                else: # user is not equal
                    if self._check_pr_commit_id(ci_dict, pr_dict) == False:
                        continue
                state = pr_dict["state"]
                if state == "closed":
                    reject_cnt += 1
                elif state == "merged":
                    prc_dict = self.get_pr_comments(owner, repo, pr_dict["number"])
            page += 1
            pr_list = self.get_pr_list_less(owner, repo, "", branch, page=page, per_page=per_page)

        if not prc_dict:
            return stat
        stat["prNum"] = prc_dict.get("number")
        stat["failCnt"] = prc_dict.get("failCnt")
        stat["rejectCnt"] = reject_cnt
        stat["addition"] = ci_dict["addition"]
        stat["deletion"] = ci_dict["deletion"]
        stat["total"] = ci_dict["total"]
        stat["reviewCnt"] = prc_dict.get("reviewCnt")
        return stat

    def _merge_statistic(self, user, owner, repo, branch, start_time, end_time):
        stats = []
        page = 1
        per_page = 50
        commit_list = self.get_commits(owner, repo, branch, user, start_time, end_time, page, per_page)
        while len(commit_list):
            for ci_dict in commit_list:
                stat_dict = self._commit_statistic(user, owner, repo, branch, ci_dict)
                if stat_dict:
                    stats.append(stat_dict)
            page += 1
            commit_list = self.get_commits(owner, repo, branch, user, start_time, end_time, page, per_page)
        return stats

    def merge_statistics(self, user, owner, repo, branch="all", start_time="", end_time=""):
        """
        Get merged statistics of owner/repo.

        Parameters:
          user - [str] username (gitee ID)
          owner - [str] the project path
          repo - [str] the code repository
          branch - [str] the query branch
          start_time - [str] the start time
          end_time - [str] then end time

        Returns:
          statistic list, the member is dict
            key - [str] user, repo, branch, state, prNum, prType, prTitle, prAuthor, failCnt, rejectCnt,
            value -     [str] [str] [str]   [str]   [int]  [str]    [str]    [str]    [int]     [int]
            key - [str] addition, deletion, total, reviewCnt, reviewer, time
            value -      [int]     [int]    [int]   [int]      [list]   [str]
        """
        stats = []
        # There is a problem with the gitee api, temporarily closed
        #contributors = self.get_contributors(owner, repo)
        #if user not in contributors:
        #    return stats
        branches = self.get_branches(owner, repo)
        if branch != "all":
            if branch in branches:
                branches.clear()
                branches.append(branch)
            else:
                branches.clear()
        for br in branches:
            sub_stats = self._merge_statistic(user, owner, repo, br, start_time, end_time)
            if sub_stats:
                stats += sub_stats
        return stats

    def _contribute_statistic(self, user, owner, repo, branch, start_time, end_time):
        stat = {"commitCnt": 0, "mergeCnt": 0, "rejectCnt": 0, "failCnt": 0,
            "addition": 0, "deletion": 0, "total": 0, "reviewCnt": 0}
        pr_nums = []
        page = 1
        per_page = 50
        commit_list = self.get_commits(owner, repo, branch, user, start_time, end_time, page, per_page)
        while len(commit_list):
            for ci_dict in commit_list:
                ci_stat_dict = self._contribute_commit_statistic(user, owner, repo, branch, ci_dict)
                if not ci_stat_dict:
                    continue
                pr_num = ci_stat_dict["prNum"]
                if pr_num not in pr_nums:
                    stat["mergeCnt"] += 1
                    # TODO: The commits of the same pr takes first rejectcnt, needs to be corrected
                    stat["rejectCnt"] += ci_stat_dict["rejectCnt"]
                    pr_nums.append(pr_num)
                stat["commitCnt"] += 1
                stat["failCnt"] += ci_stat_dict["failCnt"]
                stat["addition"] += ci_stat_dict["addition"]
                stat["deletion"] += ci_stat_dict["deletion"]
                stat["total"] += ci_stat_dict["total"]
                stat["reviewCnt"] += ci_stat_dict["reviewCnt"]
            page += 1
            commit_list = self.get_commits(owner, repo, branch, user, start_time, end_time, page, per_page)
        return stat

    def contribute_statistics(self, user, owner, repo, branch="all", start_time="", end_time=""):
        """
        Get contributed statistics of owner/repo.

        Parameters:
          user - [str] username (gitee ID)
          owner - [str] the project path
          repo - [str] the code repository
          branch - [str] the query branch
          start_time - [str] the start time
          end_time - [str] then end time

        Returns:
          statistic list, the member is dict
            key - [str] user, commitCnt, mergeCnt, rejectCnt, failCnt, addition, deletion, total, reviewCnt
            value -     [str]  [int]      [int]      [int]     [int]     [int]     [int]   [int]   [int]
        """
        stat = {}
        # There is a problem with the gitee api, temporarily closed
        #contributors = self.get_contributors(owner, repo)
        #if user not in contributors:
        #    return stats
        stat = {"user": user, "commitCnt": 0, "mergeCnt": 0, "rejectCnt": 0, "failCnt": 0,
            "addition": 0, "deletion": 0, "total": 0, "reviewCnt": 0}
        branches = self.get_branches(owner, repo)
        if branch != "all":
            if branch in branches:
                branches.clear()
                branches.append(branch)
            else:
                branches.clear()
        for br in branches:
            sub_stat = self._contribute_statistic(user, owner, repo, br, start_time, end_time)
            stat["commitCnt"] += sub_stat["commitCnt"]
            stat["mergeCnt"] += sub_stat["mergeCnt"]
            stat["rejectCnt"] += sub_stat["rejectCnt"]
            stat["failCnt"] += sub_stat["failCnt"]
            stat["addition"] += sub_stat["addition"]
            stat["deletion"] += sub_stat["deletion"]
            stat["total"] += sub_stat["total"]
            stat["reviewCnt"] += sub_stat["reviewCnt"]
        return stat

